***** Merge trip info with quote data


clear 
clear matrix
set memory 1000m
set more off
cap log close


cd "/Users/..."

global do_file="‎⁨/Users/.../do_file"
global log_file="/Users/.../log_file"
global raw_data="/Users/.../raw_data⁩⁩"
global working_data="/Users/.../working_data"
global results="/Users/.../results/081519"



log using log_file/merge_quote_drive_081619.log, replace


use working_data/clean_day_level_geo_weather_081519.dta, clear

gen calendar_day=trip_start_date

merge m:1 user_id calendar_day using working_data/quote_day_level_clean_042719.dta


sort user_id calendar_day

gen merge1=1 if _merge==1
replace merge1=0 if merge1==.

gen merge2=1 if _merge==2
replace merge2=0 if merge2==.

gen merge3=1 if _merge==3
replace merge3=0 if merge3==.


bysort user_id: egen temp_merge1=total(merge1)
bysort user_id: egen temp_merge2=total(merge2)
bysort user_id: egen temp_merge3=total(merge3)


count if temp_merge1~=0 & temp_merge2==0 & temp_merge3==0 // only has driving data
count if temp_merge2~=0 & temp_merge1==0 & temp_merge3==0 // only has quote data

drop if temp_merge1~=0 & temp_merge2==0 & temp_merge3==0
drop if temp_merge2~=0 & temp_merge1==0 & temp_merge3==0
// the remaining consumers at least have one driving record and one quote


drop merge1-merge3 temp_merge1-temp_merge3 _merge


bysort user_id: gen temp=_n
count if temp==1
drop temp
// have 3565 customers that have both info

count if order_ins==1
// among them, 502 has purchased insurance


order user_id calendar_day trip_start_date quote_created_date

gen drive_ind=1 if trip_start_date~=""
replace drive_ind=0 if trip_start_date==""

gen quote_ind=1 if quote_created_date~=""
replace quote_ind=0 if quote_created_date==""

order user_id calendar_day drive_ind quote_ind order_ins

count if drive_ind==1  // 382,139 driving record
count if quote_ind==1  // 9034 quote record
count if order_ins==1  // 502 purchase record


*********************************
* drop some quote-level variables
*********************************

drop total_fee-uuid jiaoq_dum-other_ins



********************************************************************************
* For each driving record: generate premium, number of ins quoted, and jiaoq amt
********************************************************************************

gen premium_new=avg_premium
replace premium_new=premium_new[_n-1] if premium_new[_n-1]~=. & user_id==user_id[_n-1] & drive_ind==1

bysort user_id: egen premium_temp=mean(premium_new)
replace premium_new=premium_temp if premium_new==.
drop premium_temp


gen jiaoq_amt_new=jiaoq_amt
replace jiaoq_amt_new=jiaoq_amt_new[_n-1] if jiaoq_amt_new[_n-1]~=. & user_id==user_id[_n-1] & drive_ind==1

bysort user_id: egen jiaoq_temp=mean(jiaoq_amt_new)
replace jiaoq_amt_new=jiaoq_temp if jiaoq_amt_new==.
drop jiaoq_temp	

count if jiaoq_amt_new==0
count if jiaoq_amt_new==.

bysort user_id: egen car_price=mean(purchase_price)

gen temp_engine_desc=substr(engine_desc, 1, 3)
destring temp_engine_desc, force replace
bysort user_id: egen displacement=mean(temp_engine_desc)
drop temp_engine_desc 

* there are two people have positive premium, but zero full_cov_dum
gen full_cov_dum=avg_chesun_dum+avg_sanzhe_dum+avg_daoq_dum+avg_other_ins
bysort user_id: egen full_cov_num=mean(full_cov_dum)

replace full_cov_dum=full_cov_dum[_n-1] if full_cov_dum[_n-1]~=. & user_id==user_id[_n-1] & drive_ind==1
replace full_cov_dum=full_cov_num if full_cov_dum==.
drop full_cov_num


** consider low deductible

gen ld_amt=avg_chesun_bj+avg_sanzhe_bj+avg_daoq_bj
bysort user_id: egen ld_amt_temp=mean(ld_amt)

replace ld_amt=ld_amt[_n-1] if ld_amt[_n-1]~=. & user_id==user_id[_n-1] & drive_ind==1
replace ld_amt=ld_amt_temp if ld_amt==.
drop ld_amt_temp


gen ld_num=avg_chesun_bj_dum+avg_sanzhe_bj_dum+avg_daoq_bj_dum
bysort user_id: egen ld_num_temp=mean(ld_num)

replace ld_num=ld_num[_n-1] if ld_num[_n-1]~=. & user_id==user_id[_n-1] & drive_ind==1
replace ld_num=ld_num_temp if ld_num==.
drop ld_num_temp

gen ld_dum=1 if ld_num>0 & ld_num~=.
replace ld_dum=0 if ld_num==0


save working_data/merge_quote_drive_081619.dta, replace




